-------------------------------------------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\logs/misc_data_prep.log
  log type:  text
 opened on:  16 Jul 2024, 15:51:09

. 
. /*******************************************************************************
> misc_data_prep.do
> 
> This code prepares miscellaneous data sets for later merging
> *******************************************************************************/
. 
. **** Firm-level enforcement
. import delim "${rawdatapath}Indicted_NPIsxfirm.csv", clear
(encoding automatically selected: UTF-8)
(11 vars, 87 obs)

. 
. drop if mi(npi)
(0 observations deleted)

. 
. * Find enforcement date
. gen civdate2 = date(civildate, "MDY")
(39 missing values generated)

. gen crimdate2 = date(criminaldate, "MDY")
(44 missing values generated)

. 
. gen firmenforcementdate = min(civdate2, crimdate2)

. gen indictednpi = 1 

. 
. ** Tag civil
. * There's some "BOTH" -- treat it as criminal  
. gen firmenforcementtype_civil = (criminalvscivil == "Civil")

. 
. keep npi firmenforcementdate firmenforcementtype_civil indictednpi

. 
. save "${cleandatapath}Indicted_NPIs.dta", replace 
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/Indicted_NPIs.dta saved

. 
. ************************************************************************
. **** District-level enforcement
. 
. import delim "${rawdatapath}Treatment_Dates.csv", clear  
(encoding automatically selected: UTF-8)
(6 vars, 34 obs)

. drop if mi(district)
(0 observations deleted)

. 
. gen criminal=(district_casetype=="Criminal")

. drop district_casetype

. 
. gen treatmentdate_num=date(treatmentdate, "MDY")
(3 missing values generated)

. gen backupdate_num=date(backupdate, "MDY")
(1 missing value generated)

. 
. keep treatmentdate_num backupdate_num casecount district criminal

. reshape wide treatmentdate_num backupdate_num casecount, i(district) j(criminal)
(j = 0 1)

Data                               Long   ->   Wide
-----------------------------------------------------------------------------
Number of observations               34   ->   26          
Number of variables                   5   ->   7           
j variable (2 values)          criminal   ->   (dropped)
xij variables:
                      treatmentdate_num   ->   treatmentdate_num0 treatmentdate_num1
                         backupdate_num   ->   backupdate_num0 backupdate_num1
                              casecount   ->   casecount0 casecount1
-----------------------------------------------------------------------------

. rename (*0 *1) (civ_* crim_*)

. rename *_num *

. 
. save "${cleandatapath}DOJ_data_district.dta", replace
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/DOJ_data_district.dta saved

. 
. ************************************************************************
. **** Crosswalk from DOJ Districts to Counties
. 
. use "${rawdatapath}zip_doj_crosswalk.dta", clear

. save "${cleandatapath}zip_doj_crosswalk.dta", replace // Copy to cleaned
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/zip_doj_crosswalk.dta saved

. 
. bysort fipscounty: keep if _n==1
(36,244 observations deleted)

. keep fipscounty fipsstate district state

. rename (fipscounty fipsstate state) (county state state_abbrev)

. save "${cleandatapath}DOJcounty_Xwalk.dta", replace
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/DOJcounty_Xwalk.dta saved

. 
. ************************************************************************
. **** Crosswalk from DOJ Districts to States
. 
. use "${rawdatapath}zip_doj_crosswalk.dta", clear

. keep if statewide_district==1
(28,033 observations deleted)

. keep fipsstate state district

. rename (fipsstate state district) (state state_abbrev statedistrict)

. duplicates drop

Duplicates in terms of all variables

(11,393 observations deleted)

. save "${cleandatapath}DOJstate_Xwalk.dta", replace
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/DOJstate_Xwalk.dta saved

. 
. ************************************************************************
. **** Enforcement Capacity Data
. 
. clear

. import excel "${rawdatapath}WorkHours.xlsx", sheet("Sheet1") firstrow
(24 vars, 3,136 obs)

. drop N-X

. rename (District Year) (district year)

. destring DistrictCourtCriminal DistrictCourtCivil, replace force
DistrictCourtCriminal: contains nonnumeric characters; replaced as double
(2 missing values generated)
DistrictCourtCivil: contains nonnumeric characters; replaced as double
(3 missing values generated)

. save "${cleandatapath}WorkHours.dta", replace
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/WorkHours.dta saved

. 
. 
. ************************************************************************/
. **** Public Use Data
. 
. forvalues y = 2013/2017 {
  2.         clear
  3.         import delimited "${rawdatapath}MUP`y'.csv", stringcols(_all)
  4.         gen year=`y'
  5.         tempfile MUP`y'
  6.         save `MUP`y'', replace
  7. }
(encoding automatically selected: ISO-8859-1)
(29 vars, 29,934 obs)
(file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000001.tmp not found)
file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000001.tmp saved as .dta format
(encoding automatically selected: ISO-8859-1)
(29 vars, 30,074 obs)
(file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000002.tmp not found)
file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000002.tmp saved as .dta format
(encoding automatically selected: ISO-8859-1)
(29 vars, 29,914 obs)
(file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000003.tmp not found)
file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000003.tmp saved as .dta format
(encoding automatically selected: ISO-8859-1)
(29 vars, 29,655 obs)
(file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000004.tmp not found)
file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000004.tmp saved as .dta format
(encoding automatically selected: ISO-8859-1)
(29 vars, 29,629 obs)
(file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000005.tmp not found)
file C:\Users\riley\AppData\Local\Temp\ST_5c7c_000005.tmp saved as .dta format

. clear

. forvalues y = 2013/2017 {
  2.         append using `MUP`y''
  3. }
(variable rndrng_prvdr_city was str25, now str26 to accommodate using data's values)
(variable rndrng_prvdr_first_name was str8, now str9 to accommodate using data's values)
(variable rndrng_prvdr_st1 was str52, now str53 to accommodate using data's values)
(variable hcpcs_desc was str87, now str90 to accommodate using data's values)

. 
. foreach var in tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt {
  2.         destring `var', replace
  3. }
tot_benes: all characters numeric; replaced as long
tot_srvcs: all characters numeric; replaced as double
tot_bene_day_srvcs: all characters numeric; replaced as long
avg_sbmtd_chrg: all characters numeric; replaced as double
avg_mdcr_alowd_amt: all characters numeric; replaced as double
avg_mdcr_pymt_amt: all characters numeric; replaced as double
avg_mdcr_stdzd_amt: all characters numeric; replaced as double

. rename rndrng_* *

. 
. bysort npi hcpcs_cd year: gen ct=_N

. foreach var in tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt {
  2.         bysort npi hcpcs_cd year: replace `var'=`var'[_n]+`var'[_n-1] if _n!=1 & ct==2
  3. }
(3 real changes made)
(3 real changes made)
(3 real changes made)
(3 real changes made)
(3 real changes made)
(3 real changes made)
(3 real changes made)

. bysort npi hcpcs_cd year: drop if _n==1 & ct==2
(3 observations deleted)

. drop ct prvdr_type prvdr_cntry prvdr_type prvdr_mdcr_prtcptg_ind hcpcs_desc hcpcs_drug_ind place_of_srvc

. 
. reshape wide tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt, i(npi year) j
> (hcpcs_cd) string
(j = A0425 A0426 A0427 A0428 A0429)

Data                               Long   ->   Wide
-----------------------------------------------------------------------------
Number of observations          149,203   ->   46,921      
Number of variables                  24   ->   51          
j variable (5 values)          hcpcs_cd   ->   (dropped)
xij variables:
                              tot_benes   ->   tot_benesA0425 tot_benesA0426 ... tot_benesA0429
                              tot_srvcs   ->   tot_srvcsA0425 tot_srvcsA0426 ... tot_srvcsA0429
                     tot_bene_day_srvcs   ->   tot_bene_day_srvcsA0425 tot_bene_day_srvcsA0426 ... tot_bene_day_srvcsA0429
                         avg_sbmtd_chrg   ->   avg_sbmtd_chrgA0425 avg_sbmtd_chrgA0426 ... avg_sbmtd_chrgA0429
                     avg_mdcr_alowd_amt   ->   avg_mdcr_alowd_amtA0425 avg_mdcr_alowd_amtA0426 ... avg_mdcr_alowd_amtA0429
                      avg_mdcr_pymt_amt   ->   avg_mdcr_pymt_amtA0425 avg_mdcr_pymt_amtA0426 ... avg_mdcr_pymt_amtA0429
                     avg_mdcr_stdzd_amt   ->   avg_mdcr_stdzd_amtA0425 avg_mdcr_stdzd_amtA0426 ... avg_mdcr_stdzd_amtA0429
-----------------------------------------------------------------------------

. 
. save "${cleandatapath}MUP.dta", replace
file C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\data/cleaned/MUP.dta saved

. 
. log close
      name:  <unnamed>
       log:  C:\Users\riley\Dropbox\DialysisAmbulanceFraud\JPE Submission\Final Submission\replication\logs/misc_data_prep.log
  log type:  text
 closed on:  16 Jul 2024, 15:51:16
-------------------------------------------------------------------------------------------------------------------------------------------
